# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding: utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- http://mdanalysis.googlecode.com
# Copyright (c) 2006-2011 Naveen Michaud-Agrawal,
#               Elizabeth J. Denning, Oliver Beckstein,
#               and contributors (see website for details)
# Released under the GNU Public Licence, v2 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
#     N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and
#     O. Beckstein. MDAnalysis: A Toolkit for the Analysis of
#     Molecular Dynamics Simulations. J. Comput. Chem. 32 (2011), 2319--2327,
#     doi:10.1002/jcc.21787
#

"""
TRJ/MDCRD file format (Amber) --- :mod:`MDAnalysis.coordinates.TRJ`
===================================================================

Classes to read formatted Amber_ TRJ coordinate files as defined in `Amber
TRJ format`_. It is also possible to directly read *bzip2* or *gzip*
compressed files.

Amber trajectories are recognised by the suffix '.trj' or '.mdcrd'
(possibly with an additional '.gz' or '.bz2').

.. Note::

   Support for Amber is *experimental* and feedback and contributions
   are highly appreciated. Use the `Issue Tracker`_ or get in touch on
   the `MDAnalysis mailinglist`_.

.. _Amber: http://ambermd.org
.. _Amber TRJ format: http://ambermd.org/formats.html#trajectory
.. _Issue Tracker: http://code.google.com/p/mdanalysis/issues/list
.. _MDAnalysis mailinglist: http://groups.google.com/group/mdnalysis-discussion

Units
-----

* lengths in Angstrom (Ã…)
* time in ps (but see below)


Limitations
-----------

* Reads the ASCII `Amber TRJ format`_, but not the `Amber netcdf`_ one
  (yet).

* Periodic boxes are only stored as box lengths A, B, C in an Amber
  trajectory; the reader always assumes that these are orthorhombic
  boxes.

* The trajectory does not contain time information so we simply set
  the time step to 1 ps (or the user could provide it as kwarg *delta*)

* No direct access of frames is implemented, only iteration through
  the trajectory.

* Trajectories with fewer than 4 atoms probably fail to be read (BUG).

* If the trajectory contains exactly *one* atom then it is always
  assumed to be non-periodic (for technical reasons).

.. _Amber netcdf: http://ambermd.org/netcdf/nctraj.html

Classes
-------

.. autoclass:: Timestep
   :members:
.. autoclass:: TRJReader
   :members:

"""

import numpy

import MDAnalysis
import base
import MDAnalysis.core.util as util

class Timestep(base.Timestep):
        """Amber trajectory Timestep"""
        @property
        def dimensions(self):
                """unitcell dimensions (`A, B, C, alpha, beta, gamma`)

                - `A, B, C` are the lengths of the primitive cell vectors `e1, e2, e3`
                - `alpha` = angle(`e1, e2`)
                - `beta` = angle(`e1, e3`)
                - `gamma` = angle(`e2, e3`)

                .. Note::

                   The Amber trajectory only contains box lengths
                   `A,B,C`; we assume an orthorhombic box and set all
                   angles to 90º.
                """
                # Layout of unitcell is [A,B,C,90,90,90] with the primitive cell vectors
                return self._unitcell

class TRJReader(base.Reader):
        """Amber trajectory reader.

        Reads the ASCII formatted `Amber TRJ format`_. Periodic box information
        is auto-detected.

        The number of atoms in a timestep *must* be provided in the `numatoms`
        keyword because it is not stored in the trajectory header and cannot be
        reliably autodetected. The constructor raises a :exc:`ValueError` if
        `numatoms` is left at its default value of ``None``.

        The length of a timestep is not stored in the trajectory itself but can
        be set by passing the `delta` keyword argument to the constructor; it
        is assumed to be in ps. The default value is 1 ps.

        Functionality is currently limited to simple iteration over the
        trajectory.

        .. _Amber TRJ format: http://ambermd.org/formats.html#trajectory
        """
        format = 'TRJ'
        units = {'time': 'ps', 'length': 'Angstroms'}
        _Timestep = Timestep

        # TODO: implement random access via seek
        #       - compute size of frame
        #       - compute seek offset & go
        #       - check that this works for files >2GB

        def __init__(self, trjfilename, numatoms=None, **kwargs):
                # amber trj REQUIRES the number of atoms from the topology
                if numatoms is None:
                        raise ValueError("Amber TRJ reader REQUIRES the numatoms keyword")
                self.filename = trjfilename
                self.__numatoms = numatoms
                self.__numframes = None

                self.trjfile = None  # have _read_next_timestep() open it properly!
                self.fixed = 0
                self.skip = 1
                self.skip_timestep = 1   # always 1 for trj at the moment
                self.delta = kwargs.pop("delta", 1.0)   # can set delta manually, default is 1ps
                self.ts = Timestep(self.numatoms)

                # FORMAT(10F8.3)  (X(i), Y(i), Z(i), i=1,NATOM)
                self.default_line_parser = util.FORTRANReader("10F8.3")
                self.lines_per_frame = int(numpy.ceil(3.0 * self.numatoms / len(self.default_line_parser)))
                # The last line per frame might have fewer than 10
                # We determine right away what parser we need for the last
                # line because it will be the same for all frames.
                last_per_line = 3 * self.numatoms % len(self.default_line_parser)
                self.last_line_parser = util.FORTRANReader("%dF8.3" % last_per_line)

                # FORMAT(10F8.3)  BOX(1), BOX(2), BOX(3)
                # is this always on a separate line??
                self.box_line_parser = util.FORTRANReader("3F8.3")

                # Now check for box
                self.periodic = False
                self._detect_amber_box()

                # open file, read first frame
                self._read_next_timestep()

        def _read_next_timestep(self):
                # FORMAT(10F8.3)  (X(i), Y(i), Z(i), i=1,NATOM)
                ts = self.ts
                if self.trjfile is None:
                        self.open_trajectory()

                # Read coordinat frame:
                #coordinates = numpy.zeros(3*self.numatoms, dtype=numpy.float32)
                _coords = []
                for number,line in enumerate(self.trjfile):
                        try:
                                _coords.extend(self.default_line_parser.read(line))
                        except ValueError:
                                # less than 10 entries on the line:
                                _coords.extend(self.last_line_parser.read(line))
                        if number == self.lines_per_frame - 1:
                                # read all atoms that are there in this frame
                                break
                if _coords == []:
                        # at the end of the stream (the loop has not been entered)
                        raise EOFError

                # Read box information
                if self.periodic:
                        line = self.trjfile.next()
                        box = self.box_line_parser.read(line)
                        ts._unitcell[:3] = numpy.array(box, dtype=numpy.float32)
                        ts._unitcell[3:] = [90.,90.,90.]  # assumed

                # probably slow ... could be optimized by storing the coordinates in X,Y,Z
                # lists or directly filling the array; the array/reshape is not good
                # because it creates an intermediate array
                ts._pos[:] = numpy.array(_coords).reshape(self.numatoms, 3)
                ts.frame += 1
                return ts

        def _detect_amber_box(self):
                """Detecting a box in a Amber trajectory

                Rewind trajectory and check for potential box data
                after the first frame.

                Set :attr:`TRJReader.periodic` to ``True`` if box was
                found, ``False`` otherwise.

                Only run at the beginning as it *rewinds* the trajctory.

                 - see if there's data after the atoms have been read that looks
                   like::

                     FORMAT(10F8.3)  BOX(1), BOX(2), BOX(3)
                     BOX    : size of periodic box

                 - this WILL fail if we have exactly 1 atom in the trajectory because
                   there's no way to distinguish the coordinates from the box
                   so for 1 atom we always assume no box
                 XXX: needs a Timestep that knows about Amber unitcells!
                """
                if self.numatoms == 1:
                        # for 1 atom we cannot detect the box with the current approach
                        self.periodic = False   # see _read_next_timestep()!
                        wmsg = "Trajectory contains a single atom: assuming periodic=False"
                        warnings.warn(wmsg)
                        return False

                self._reopen()
                self.periodic = False      # make sure that only coordinates are read
                self._read_next_timestep()
                ts = self.ts
                # TODO: what do we do with 1-frame trajectories? Try..except EOFError?
                line = self.trjfile.next()
                nentries = self.default_line_parser.number_of_matches(line)
                if nentries == 3:
                        self.periodic = True
                        ts._unitcell[:3] = self.box_line_parser.read(line)
                        ts._unitcell[3:] = [90.,90.,90.]  # assumed
                else:
                        self.periodic = False
                        ts._unitcell = numpy.zeros(6, numpy.float32)
                self.close()
                return self.periodic


        @property
        def numframes(self):
                """Number of frames (obtained from reading the whole trajectory)."""
                if not self.__numframes is None:   # return cached value
                        return self.__numframes
                try:
                        self.__numframes = self._read_trj_numframes(self.filename)
                except IOError:
                        return 0
                else:
                        return self.__numframes

        def _read_trj_numatoms(self, filename):
                raise NotImplementedError("It is not possible to relaibly deduce NATOMS from Amber trj files")

        def _read_trj_numframes(self, filename):
                self._reopen()
                # the number of lines in the XYZ file will be 2 greater than the number of atoms
                linesPerFrame = self.numatoms * 3. / 10.

                counter = 0
                # step through the file (assuming xyzfile has an iterator)
                for i in self.trjfile:
                        counter = counter + 1
                self.close()
                # need to check this is an integer!
                numframes = int(counter/linesPerFrame)
                return numframes

        @property
        def numatoms(self):
                if not self.__numatoms is None:   # return cached value
                        return self.__numatoms
                try:
                        self.__numatoms = self._read_trj_numatoms(self.filename)
                except IOError:
                        return 0
                else:
                        return self.__numatoms

        def __del__(self):
                if not self.trjfile is None:
                        self.close()

        def __len__(self):
                return self.numframes

        def _reopen(self):
                self.close()
                self.open_trajectory()

        def open_trajectory(self):
                """Open the trajectory for reading and load first frame."""
                self.trjfile, filename = util.anyopen(self.filename, 'r')
                self.header = self.trjfile.readline()  # ignore first line
                if len(self.header.rstrip()) > 80:
                        # Chimera uses this check
                        raise OSError("Header of Amber formatted trajectory has more than 80 chars. "
                                      "This is probably not a Amber trajectory.")
                # reset ts
                ts = self.ts
                ts.status = 1
                ts.frame = 0
                ts.step = 0
                ts.time = 0
                return self.trjfile

        def close(self):
                """Close trj trajectory file if it was open."""
                if self.trjfile is None:
                        return
                self.trjfile.close()
                self.trjfile = None


        def rewind(self):
                """Reposition at the beginning of the trajectory"""
                self._reopen()
                self.next()


        def __iter__(self):
                self._reopen()
                #yield self.ts
                #raise StopIteration
                self.ts.frame = 0  # start at 0 so that the first frame becomes 1
                while True:
                        try:
                                yield self._read_next_timestep()
                        except EOFError:
                                self.close()
                                raise StopIteration
